package com.thaiopensource.util;

/**
 * Selectively percent-encodes characters in a URI.
 */
public class UriEncoder
{
  /**
   * Flag to include U+0000 - U+001F.
   */
  static private final int C0_CONTROL = 0x01;
  /**
   * Flag to include U+0020
   */
  static private final int SPACE = 0x02;
/**
   * Flag to include '<', '>', '"'
   */
  static private final int DELIM = 0x04;
  /**
   * Flag to include '{', '}', '|', '\\', '^', U+007E
   */
  static private final int UNWISE = 0x08;
  /**
   * Flag to include U+007F
   */
  static private final int DELETE = 0x10;
  /**
   * Flag to include U+0080 - U+009F
   */
  static private final int C1_CONTROL = 0x20;
  /**
   * Flag to include any non-ASCII character with category Zs, Zl, and Zp
   */
  static private final int NON_ASCII_SEPARATOR = 0x40;
  /**
   * Flag to include any other character with code-point >= U+0080
   */
  static private final int OTHER_NON_ASCII = 0x80;

  static private final int ASCII_CONTROL = C0_CONTROL | DELETE;
  static private final int CONTROL = ASCII_CONTROL | C1_CONTROL;
  static private final int SEPARATOR = NON_ASCII_SEPARATOR | SPACE;
  static private final int ASCII_GRAPHIC_FORBIDDEN = DELIM | UNWISE;
  static private final int ASCII_PRINTABLE_FORBIDDEN = ASCII_GRAPHIC_FORBIDDEN | SPACE;
  static private final int ASCII_FORBIDDEN = ASCII_CONTROL | ASCII_PRINTABLE_FORBIDDEN;
  static private final int NON_ASCII = C1_CONTROL | NON_ASCII_SEPARATOR | OTHER_NON_ASCII;
  static private final int JAVA_URI_FORBIDDEN = CONTROL | SEPARATOR | ASCII_PRINTABLE_FORBIDDEN;
  static private final int URI_FORBIDDEN = ASCII_FORBIDDEN | NON_ASCII;

  static public String encode (final String s)
  {
    return encode (s, JAVA_URI_FORBIDDEN);
  }

  static public String encodeAsAscii (final String s)
  {
    return encode (s, URI_FORBIDDEN);
  }

  static private String encode (final String s, final int flags)
  {
    StringBuffer encoded = null;
    final int len = s.length ();
    for (int i = 0; i < len; i++)
    {
      final char c = s.charAt (i);
      boolean mustEncode;
      switch (c)
      {
        case '<':
        case '>':
        case '"':
          mustEncode = ((flags & DELIM) != 0);
          break;
        case '{':
        case '}':
        case '|':
        case '\\':
        case '^':
        case '`':
          mustEncode = ((flags & UNWISE) != 0);
          break;
        case 0x20:
          mustEncode = ((flags & SPACE) != 0);
          break;
        case 0x7F:
          mustEncode = ((flags & DELETE) != 0);
          break;
        default:
          if (c < 0x20)
            mustEncode = ((flags & C0_CONTROL) != 0);
          else
            if (c < 0x80)
              mustEncode = false;
            else
            {
              switch (flags & NON_ASCII)
              {
                case NON_ASCII:
                  // all non-ASCII chars need to be escaped
                  mustEncode = true;
                  break;
                case 0:
                  // no non-ASCII chars need to be escaped
                  mustEncode = false;
                  break;
                default:
                  if (Character.isISOControl (c))
                    mustEncode = ((flags & C1_CONTROL) != 0);
                  else
                    if (Character.isSpaceChar (c))
                      mustEncode = ((flags & NON_ASCII_SEPARATOR) != 0);
                    else
                      mustEncode = ((flags & OTHER_NON_ASCII) != 0);
                  break;
              }
            }
      }
      if (mustEncode)
      {
        if (encoded == null)
          encoded = new StringBuffer (s.substring (0, i));
        int codePoint;
        if (Utf16.isSurrogate1 (c) && i + 1 < len && Utf16.isSurrogate2 (s.charAt (i + 1)))
          codePoint = Utf16.scalarValue (c, s.charAt (++i));
        else
          codePoint = c;
        encoded.append (percentEncode (Utf8.encode (codePoint)));
      }
      else
        if (encoded != null)
          encoded.append (c);
    }
    if (encoded != null)
      return encoded.toString ();
    return s;
  }

  static private final String hexDigits = "0123456789ABCDEF";

  static char [] percentEncode (final byte [] bytes)
  {
    final char [] buf = new char [bytes.length * 3];
    int j = 0;
    for (final byte b : bytes)
    {
      buf[j++] = '%';
      buf[j++] = hexDigits.charAt ((b >> 4) & 0xF);
      buf[j++] = hexDigits.charAt (b & 0xF);
    }
    return buf;
  }
}
